# get_oil_and_gas_data.R
#
# downloads EIA data on crude oil and natural gas production by
# state, along with state level wellhead prices for crud oil and citygate
# natural gas prices as a proxy for natural gas well head prices. together these
# data are used to determine the share of oil and gas extraction value that is
# due to natural gas, relative to crude oil.
#
# second, downloads census data on state level international imports and exports
# of crude oil and natural gas to determine the share of oil and gas extraction
# trade due to natural gas.
#
# these state level data are aggregate to the model regions and stored in the
# file "build/data/oil_and_gas_data.gms" which is used by
# "build/disaggregate_oil_and_gas.gms" which disaggregates oil and gas
# extraction activities into seperate crude oil and natural gas sectors
#



#------------------------------------------------------------------------------
# set general parameters
# ------------------------------------------------------------------------------

# year of the benchmark
year = 2016

# output file for gams parameter data
output.file = file.path("build","data","oil_and_gas_data.gms")



# ------------------------------------------------------------------------------
# load libraries
# ------------------------------------------------------------------------------

# load in the R utilities for sage
source(file.path("utilities","R_utilities.R"))



# ------------------------------------------------------------------------------
# get the crude oil trade data
# ------------------------------------------------------------------------------

# create a data frame with all state abbreviations -> merging the downloaded
# census data with this ensures that all states are represented even if the
# value is zero, also gets rid of us teritories in the census data
states = data.frame(state=tolower(state.abb))

# harmonized system code for crude oil
cru.hs.commodity = 270900

# download crude oil exports by state of origin
temp = merge(states, get.trade.data(cru.hs.commodity,"exports"), all.x=TRUE)
names(temp)[1] = "r"
temp$value[is.na(temp$value)] = 0
temp$s = "cru"
temp = temp[,c(1,3,2)]

# write data to the gams input file
write.gams.variable(output.file,"x_og",temp,
                    comments="crude oil international exports")

# download crude oil imports by state of destination
temp = merge(states, get.trade.data(cru.hs.commodity,"imports"), all.x=TRUE)
names(temp)[1] = "r"
temp$value[is.na(temp$value)] = 0
temp$s = "cru"
temp = temp[,c(1,3,2)]

# write data to the gams input file
write.gams.variable(output.file,"m_og",temp,append=TRUE,
                    comments="crude oil international imports")



# ------------------------------------------------------------------------------
# get the natural gas trade data
# ------------------------------------------------------------------------------

# harmonized system code for natural gas liquids and gases -> need to use both
# to cover the cases where the gas is tanked or piped
gas.hs.commodity = c(271111,271121)

# download natural gas exports by state of origin
temp1 = merge(states, get.trade.data(gas.hs.commodity[1],"exports"), all.x=TRUE)
names(temp1)[1] = "r"
temp1$value[is.na(temp1$value)] = 0
temp2 = merge(states, get.trade.data(gas.hs.commodity[2],"exports"), all.x=TRUE)
names(temp2)[1] = "r"
temp2$value[is.na(temp2$value)] = 0
temp = merge(temp1,temp2,by="r")
temp$value = rowSums(temp[,c(2,3)])
temp = temp[,-c(2,3)]
temp$s = "gas"
temp = temp[,c(1,3,2)]

# write data to the gams input file
write.gams.variable(output.file,"x_og",temp,append=TRUE,
                    comments="natural gas international exports")

# download natural gas imports by state of destination
temp1 = merge(states, get.trade.data(gas.hs.commodity[1],"imports"), all.x=TRUE)
names(temp1)[1] = "r"
temp1$value[is.na(temp1$value)] = 0
temp2 = merge(states, get.trade.data(gas.hs.commodity[2],"imports"), all.x=TRUE)
names(temp2)[1] = "r"
temp2$value[is.na(temp2$value)] = 0
temp = merge(temp1,temp2,by="r")
temp$value = rowSums(temp[,c(2,3)])
temp = temp[,-c(2,3)]
names(temp) = c("r","value")
temp$s = "gas"
temp = temp[,c(1,3,2)]

# write data to the gams input file
write.gams.variable(output.file,"m_og",temp,append=TRUE,
                    comments="natural gas international imports")



# ------------------------------------------------------------------------------
# get crude oil production
# ------------------------------------------------------------------------------

# load info with the sate fips and padd codes
state.info = read.csv(file.path("build","data","state_info.csv"),
                      stringsAsFactors=FALSE)

# function to add a preceding zero to a single digit number
add.zeros = function(x) if (nchar(x)==1) paste0("0",x) else x

# setup a container for the data
eia.data = NULL

# intialize space for the average padd prices
padds = unique(state.info$padd)
padd.price = data.frame(padd=padds,value=NA)

for (padd in padds) {

  # series id for domestic crude oil first purchase price by state
  if (substr(padd,1,1)=="1")
    series.id = "PET.F001000__3.A"
  else
    series.id = paste0("PET.F00",padd,"00__3.A")

  # download the eia data
  price = get.eia(series.id)

  # save the average padd price
  padd.price$value[padd.price$padd==padd] = price$value[price$year==year]

}

for (state in state.abb) {

  # for alaska use the north slope fips code otherwise use the regulare code
  if (state=="AK")
    fips = 71
  else
    fips = add.zeros(state.info$fips[state.info$state==state])

  # series id for domestic crude oil first purchase price by state
  series.id = paste0("PET.F00",state.info$padd[state.info$state==state],fips,
                     "__3.A")

  # NY price data is witheld - therefore use continental average
  if (state=="NY")
    series.id = "PET.F009960__3.A"
  
  # download the eia data
  price = get.eia(series.id)

  # if the state has data keep only the benchmark year
  if (nrow(price)>0) {
    price = price[price$year==year,]
    price$state = tolower(state)
  } else {
    price = data.frame(year=year,
                       value=padd.price$value[padd.price$padd==state.info$padd[state.info$state==state]],
                       state=tolower(state))
  }

  # series id for crude oil production
  series.id = paste0("PET.MCRFP",state,"1.A")

  # download the eia data
  quantity = get.eia(series.id)

  # if the state has data store the production value in the benchmark year
  if (nrow(quantity)>0 & nrow(price)>0) {
    quantity = quantity[quantity$year==year,]
    price$value = price$value*quantity$value*1000
#    price$value = quantity$value
    eia.data = rbind(eia.data,price[,-1])
  } else {
    print(paste0("Warning: Oil production value not available for ",state))
  }

}

# convert to billions of dollars
eia.data$value = eia.data$value*1e-9

# aggregate to the model regions
temp = eia.data
names(temp)[2] = "r"
temp$s = "cru"
temp = temp[,c(2,3,1)]

# write data to the gams input file
write.gams.variable(output.file,"y_og",temp,append=TRUE,
                    comments="crude oil output")



# ------------------------------------------------------------------------------
# get natural gas production
# ------------------------------------------------------------------------------

eia.data = NULL

for (state in state.abb) {

  # series id for gross natural gas city gate price (used as an approximation
  # to the wellhead price for the sate)
  series.id = paste0("NG.N3050",state,"3.A")

  # download the eia data
  price = get.eia(series.id)

  if (nrow(price)>0) {
    price = price[price$year==year,]
    price$state = tolower(state)
  }

  # series id for gross natural gas withdrawls
  series.id = paste0("NG.N9010",state,"2.A")

  # download the eia data
  quantity = get.eia(series.id)

  if (nrow(quantity)>0 & nrow(price)>0) {
    quantity = quantity[quantity$year==year,]
    price$value = price$value*quantity$value*1000
#    price$value = quantity$value
    eia.data = rbind(eia.data,price[,-1])
  } else {
    print(paste0("Warning: Gas production value not available for ",state))
  }

}

# convert to billions of dollars
eia.data$value = eia.data$value*1e-9

# aggregate to the model regions and save the data
temp = eia.data
names(temp)[2] = "r"
temp$s = "gas"
temp = temp[,c(2,3,1)]

# write data to the gams input file
write.gams.variable(output.file,"y_og",temp,append=TRUE,
                    comments="natural gas output")
